const noteCache = require('./note_cache');
const noteCacheService = require('./note_cache_service.js');
const dateUtils = require('../date_utils');

const IGNORED_ATTR_NAMES = [
    "includenotelink",
    "internallink",
    "imagelink",
    "relationmaplink",
    "template",
    "disableversioning",
    "archived",
    "hidepromotedattributes",
    "keyboardshortcut",
    "bookzoomlevel",
    "noteinfowidgetdisabled",
    "linkmapwidgetdisabled",
    "noterevisionswidgetdisabled",
    "whatlinksherewidgetdisabled",
    "similarnoteswidgetdisabled",
    "disableinclusion",
    "rendernote",
    "pageurl",
];

function filterLabelValue(value) {
    return value
        .replace(/https?:\/\//ig, "")
        .replace(/www\./ig, "")
        .replace(/(\.net|\.com|\.org|\.info|\.edu)/ig, "");
}

/**
 * @param {Note} note
 */
function buildRewardMap(note) {
    const map = {};

    function addToRewardMap(text, rewardFactor) {
        if (!text) {
            return;
        }

        for (const word of splitToWords(text)) {
            if (word) {
                map[word] = map[word] || 0;

                // reward grows with the length of matched string
                const length = word.length
                    - 0.9; // to penalize specifically very short words - 1 and 2 characters

                map[word] += rewardFactor * Math.pow(length, 0.7);
            }
        }
    }

    for (const ancestorNote of note.ancestors) {
        if (ancestorNote.noteId === 'root') {
            continue;
        }

        if (ancestorNote.isDecrypted) {
            addToRewardMap(ancestorNote.title, 0.3);
        }

        for (const branch of ancestorNote.parentBranches) {
            addToRewardMap(branch.prefix, 0.3);
        }
    }

    addToRewardMap(trimMime(note.mime), 0.5);

    if (note.isDecrypted) {
        addToRewardMap(note.title, 1);
    }

    for (const branch of note.parentBranches) {
        addToRewardMap(branch.prefix, 1);
    }

    for (const attr of note.attributes) {
        if (attr.name.startsWith('child:')
            || attr.name.startsWith('relation:')
            || attr.name.startsWith('label:')) {
            continue;
        }

        // inherited notes get small penalization
        const reward = note.noteId === attr.noteId ? 0.8 : 0.5;

        if (!IGNORED_ATTR_NAMES.includes(attr.name)) {
            addToRewardMap(attr.name, reward);
        }

        addToRewardMap(filterLabelValue(attr.value), reward);
    }

    return map;
}

const mimeCache = {};

function trimMime(mime) {
    if (!mime || mime === 'text/html') {
        return;
    }

    if (!(mime in mimeCache)) {
        const chunks = mime.split('/');

        let str = "";

        if (chunks.length >= 2) {
            // we're not interested in 'text/' or 'application/' prefix
            str = chunks[1];

            if (str.startsWith('-x')) {
                str = str.substr(2);
            }
        }

        mimeCache[mime] = str;
        mimeCache[mime] = str;
    }

    return mimeCache[mime];
}

function buildDateLimits(baseNote) {
    const dateCreatedTs = dateUtils.parseDateTime(baseNote.utcDateCreated);

    return {
        minDate: dateUtils.utcDateStr(new Date(dateCreatedTs - 3600)),
        minExcludedDate: dateUtils.utcDateStr(new Date(dateCreatedTs - 5)),
        maxExcludedDate: dateUtils.utcDateStr(new Date(dateCreatedTs + 5)),
        maxDate: dateUtils.utcDateStr(new Date(dateCreatedTs + 3600)),
    };
}

const wordCache = {};

const WORD_BLACKLIST = [
    "a", "the", "in", "for", "from", "but", "s", "so", "if", "while", "until",
    "whether", "after", "before", "because", "since", "when", "where", "how",
    "than", "then", "and", "either", "or", "neither", "nor", "both", "also"
];

function splitToWords(text) {
    let words = wordCache[text];

    if (!words) {
        wordCache[text] = words = text.toLowerCase().split(/\W+/);

        for (const idx in words) {
            if (WORD_BLACKLIST.includes(words[idx])) {
                words[idx] = "";
            }
            // special case for english plurals
            else if (words[idx].length > 2 && words[idx].endsWith("es")) {
                words[idx] = words[idx].substr(0, words[idx] - 2);
            }
            else if (words[idx].length > 1 && words[idx].endsWith("s")) {
                words[idx] = words[idx].substr(0, words[idx] - 1);
            }
        }
    }

    return words;
}

async function findSimilarNotes(noteId) {
    const results = [];
    let i = 0;

    const baseNote = noteCache.notes[noteId];

    if (!baseNote) {
        return [];
    }

    const dateLimits = buildDateLimits(baseNote);
    const rewardMap = buildRewardMap(baseNote);
    const ancestorRewardCache = {};
    const ancestorNoteIds = new Set(baseNote.ancestors.map(note => note.noteId));

    function gatherRewards(text, factor = 1) {
        if (!text) {
            return 0;
        }

        let counter = 0;

        // when the title is very long then weight of each individual word should be lower
        // also pretty important in e.g. long URLs in label values
        const lengthPenalization = 1 / Math.pow(text.length, 0.3);

        for (const word of splitToWords(text)) {
            counter += rewardMap[word] * factor * lengthPenalization || 0;
        }

        return counter;
    }

    function gatherAncestorRewards(note) {
        if (!(note.noteId in ancestorRewardCache)) {
            let score = 0;

            for (const parentNote of note.parents) {
                if (!ancestorNoteIds.has(parentNote.noteId)) {
                    if (parentNote.isDecrypted) {
                        score += gatherRewards(parentNote.title, 0.3);
                    }

                    for (const branch of parentNote.parentBranches) {
                        score += gatherRewards(branch.prefix, 0.3)
                               + gatherAncestorRewards(branch.parentNote);
                    }
                }
            }

            ancestorRewardCache[note.noteId] = score;
        }

        return ancestorRewardCache[note.noteId];
    }

    function computeScore(candidateNote) {
        let score = gatherRewards(trimMime(candidateNote.mime))
                  + gatherAncestorRewards(candidateNote);

        if (candidateNote.isDecrypted) {
            score += gatherRewards(candidateNote.title);
        }

        for (const branch of candidateNote.parentBranches) {
            score += gatherRewards(branch.prefix);
        }

        for (const attr of candidateNote.attributes) {
            if (attr.name.startsWith('child:')
                || attr.name.startsWith('relation:')
                || attr.name.startsWith('label:')) {
                continue;
            }

            if (!IGNORED_ATTR_NAMES.includes(attr.name)) {
                score += gatherRewards(attr.name);
            }

            score += gatherRewards(attr.value);
        }

        if (candidateNote.type === baseNote.type) {
            score += 0.2;
        }

        /**
         * We want to improve standing of notes which have been created in similar time to each other since
         * there's a good chance they are related.
         *
         * But there's an exception - if they were created really close to each other (withing few seconds) then
         * they are probably part of the import and not created by hand - these OTOH should not benefit.
         */
        const {utcDateCreated} = candidateNote;

        if (utcDateCreated < dateLimits.minExcludedDate && utcDateCreated > dateLimits.maxExcludedDate) {
            if (utcDateCreated >= dateLimits.minDate && utcDateCreated <= dateLimits.maxDate) {
                score += 1;
            }
            else if (utcDateCreated.substr(0, 10) === dateLimits.minDate.substr(0, 10) || utcDateCreated.substr(0, 10) === dateLimits.maxDate.substr(0, 10)) {
                // smaller bonus when outside of the window but within same date
                score += 0.5;
            }
        }

        return score;
    }

    for (const candidateNote of Object.values(noteCache.notes)) {
        if (candidateNote.noteId === baseNote.noteId) {
            continue;
        }

        let score = computeScore(candidateNote);

        if (score >= 1.5) {
            const notePath = noteCacheService.getSomePath(candidateNote);

            // this takes care of note hoisting
            if (!notePath) {
                return;
            }

            if (noteCacheService.isNotePathArchived(notePath)) {
                score -= 0.5; // archived penalization
            }

            results.push({score, notePath, noteId: candidateNote.noteId});
        }

        i++;

        if (i % 1000 === 0) {
            await setImmediatePromise();
        }
    }

    results.sort((a, b) => a.score > b.score ? -1 : 1);

    results.forEach(r => {
        const note = noteCache.notes[r.noteId];

        if (!note.isDecrypted) {
            console.log(note.pojo);
        }
    });

    console.log(rewardMap);

    return results.length > 200 ? results.slice(0, 200) : results;
}

/**
 * Point of this is to break up long running sync process to avoid blocking
 * see https://snyk.io/blog/nodejs-how-even-quick-async-functions-can-block-the-event-loop-starve-io/
 */
function setImmediatePromise() {
    return new Promise((resolve) => {
        setTimeout(() => resolve(), 0);
    });
}

module.exports = {
    findSimilarNotes
};
